import scrapy

from proxy.items import ProxyItem

class ProxyspiderSpider(scrapy.Spider):
	name = 'proxySpider'
	doamins = 'www.kuaidaili.com'
	allowed_domains = [doamins]
	# 开始爬取的页面
	current_page = 1
	protocol = 'https://'
	start_urls = [protocol + doamins + "/free/inha/" + str(current_page)]
	# 要爬取的总页数
	page = 10

	def parse(self, response):
		trList = response.xpath('/html/body/div/div[4]/div[2]/div/div[2]/table/tbody/tr')
		trIndex = 1
		for tr in trList:
			trIndex += 1
			tdList = []
			for td in tr.xpath('.//td/text()'):
				if trIndex > 1:
					tdList.append(td.get().strip())
			proxyItem = ProxyItem(ip = tdList[0], port = tdList[1], anonymity = tdList[2], type = tdList[3], location = tdList[4], responseSpeed = tdList[5], lastVerify = tdList[6], isDeleted = 0)
			yield proxyItem
		if self.current_page < self.page:
			self.current_page += 1
			next_url = self.protocol + self.doamins + "/free/inha/" + str(self.current_page)
			print(next_url)
			yield scrapy.Request(url=next_url, callback=self.parse)